import pandas as pd
import numpy as np
import statsmodels.api as sm
from math import sqrt


# Define the file path to the research project's replication package 
filepath = r'C:\Users\mehran.azimi\\'

# output file name
file_name='Inv.xlsx'


def predict_var(_var,hist_ep=0):
    y_temp=np.array([])
    for i in range(0,nof_forecasts): 
        x1=np.array( all_data1[indepvar ].iloc[rolling*i:initsampsize+i-taw].astype(float))
        X = sm.add_constant(x1)
        y=np.array(all_data1[_var].iloc[taw + rolling*i:initsampsize+i])
        model = sm.OLS(y, X)
        results = model.fit()
        try:
            yhat=results._results.params[0] + np.dot(results._results.params[1:], np.array(  all_data1[indepvar].iloc[initsampsize+i-taw] )  )  
        except:
            yhat=np.nan
        y_temp = np.append(y_temp, yhat)
    return y_temp

def Z_jk_test(series):
    sigma_i=(prediction['ret_prediction']+prediction['rf']).std(); mu_i=prediction['ret_prediction'].mean()
    sigma_n=(prediction[series]+prediction['rf']).std(); mu_n=prediction[series].mean()
    sigma_i_n=prediction['ret_prediction'].cov(prediction[series])
    teta_hat=(1/len(prediction['ret_prediction']) )* ( 2*sigma_i**2 * sigma_n**2 -2*sigma_i*sigma_n*sigma_i_n + 0.5*mu_i**2 * sigma_n**2 + 0.5*mu_n**2 * sigma_i**2 - mu_i*mu_n*sigma_i_n**2/sigma_i/sigma_n  )
    try:
        Z_jk=(sigma_n*mu_i-sigma_i*mu_n) / (sqrt(teta_hat))
    except:
        return np.nan
    return Z_jk

 

begdate=199002 
enddate=202212   
w_low, w_high=(0,2)


data_Inv=pd.read_csv(filepath + 'AllData.csv')
data_Inv=data_Inv[ (data_Inv['yearmo']>=begdate) & (data_Inv['yearmo']<=enddate)  ]
data_Inv=data_Inv.reset_index()

_vars=['sii','gp', 'alpha', 'ogap_dyn', 'skvw', 'dy', 'dp', 'lty', 'avgcor', 'wtexas', 'bm', 'tbl', 'vrp', 'tail', 'ep', 'svar', 'infl', 'tms', 'eg', 'dfy', 'de', 'ndrbl', 'ntis', 'fbm','lzrt','dtoat','dfr','ltr']
date_filter = data_Inv['date'] == '12/31/2022'  
indepvars_2022 = data_Inv[  data_Inv['date']=='12/31/2022' ][_vars].dropna(axis=1).columns.to_list()
indepvars_2021 = list(set(_vars) - set(indepvars_2022) ) + ['alpha']

# To get results for variables that end in 2021, substitute indepvars_2022 with indepvars_2021 below
all_data = data_Inv.copy()
all_data[ ['mkt_1', 'rf', 'mktrf'] ] = all_data[ ['Mkt','RF','MktRF']  ]/100
all_data['mkt']=all_data['Mkt']/100

inv_strategy=pd.DataFrame(columns=['Predictor','W_avrg','Ret_avrg', 'SR','SR_bah', 'Z_jk_bah','rolling','Risk_aversion' ,'var_method'])
all_series=dict()
for var_method in ["Historical",'GARCH']:
    depvar='mktrf'
    initsampsize = 198
    taw = 1
    for indepvar in [ [x] for x in  indepvars_2022  ]:
        for rolling in [0,1]:
            for risk_aversion in [2,3,4]:
                series_name= f"{depvar}--{indepvar}--{var_method}--{initsampsize}--rolling{rolling}--ra{risk_aversion}--taw{taw}--W{w_low}to{w_high}"
                all_data1=all_data[['monthnum','yearmo','rf', 'mktrf', 'mkt', 'std_5y_rolling','q2','dvar']+indepvar].copy()
                all_data1.dropna(inplace=True,subset=[depvar]+indepvar)
                all_data1.reset_index(drop=True,inplace=True)
                nof_forecasts=len(all_data1)-initsampsize
                prediction=pd.DataFrame()
                prediction=( all_data1[indepvar+['monthnum','yearmo','rf','mktrf','mkt','std_5y_rolling','q2','dvar']].iloc[initsampsize: ]).reset_index(drop=True)

                prediction['predicted']=predict_var(depvar)
                if var_method=="GARCH":
                    if "mkt" in depvar:
                        denum='q2'
                    else:
                        denum='error'
                elif var_method=="Historical":
                    if "mkt" in depvar:
                        denum='dvar'
                    else:
                        denum='error'

                prediction['weight']=np.clip(prediction['predicted']/prediction[denum]/risk_aversion,w_low,w_high)
                prediction['ret_prediction'] =prediction['weight'] *prediction['mktrf']
                all_series[series_name] = (prediction[['monthnum','yearmo','ret_prediction']].copy()).rename(columns={'ret_prediction': series_name})
                SR=sqrt(12)*prediction['ret_prediction'].mean() / (prediction['ret_prediction']+prediction['rf']).std(ddof=0)
                SR_bah=sqrt(12)*prediction['mktrf'].mean() / (prediction['mkt']).std(ddof=0)
                Z_jk_bah = Z_jk_test('mktrf')
                inv_strategy.loc[len(inv_strategy.index)]= [indepvar, prediction['weight'].mean() ,
                                    100*prediction['ret_prediction'].mean() , 
                                    SR,SR_bah,  Z_jk_bah,
                                    rolling,risk_aversion ,
                                    var_method]

inv_strategy.to_excel(filepath+file_name, index=False)


